import pandas as pd
import numpy as np
import pymc as pm
import matplotlib.pyplot as plt
import arviz as az
import seaborn as sns# Set random seed for reproducibility
np.random.seed(42)
# ============================================================================
# SIMULATE DATA
# ============================================================================
# Simulate some jury voting data
n_cases = 50 # number of cases
n_jurors = 15 # number of jurors
# True states (1 = guilty, 0 = not guilty)
true_states = np.random.binomial(1, 0.5, n_cases)
# Individual juror competencies (for simulation)
true_p = 0.7 # average competence
true_discrimination = 0.15 # how much competencies vary (sd in logit space)
# Simulate heterogeneous competencies
logit_p_jurors = np.random.normal(np.log(true_p / (1 - true_p)),
true_discrimination,
n_jurors)
p_jurors = 1 / (1 + np.exp(-logit_p_jurors))
# Simulate votes
votes = np.zeros((n_cases, n_jurors))
for i in range(n_cases):
for j in range(n_jurors):
if true_states[i] == 1:
votes[i, j] = np.random.binomial(1, p_jurors[j])
else:
votes[i, j] = np.random.binomial(1, 1 - p_jurors[j])
print(f"Data simulated: {n_cases} cases, {n_jurors} jurors")
print(f"True average competence: {p_jurors.mean():.3f}")
print(f"Majority vote accuracy: {(votes.mean(axis=1) > 0.5).mean():.3f}")Data simulated: 50 cases, 15 jurors
True average competence: 0.696
Majority vote accuracy: 0.400
# Define different prior specifications
prior_specs = {
'weakly_informative': {'alpha': 3, 'beta': 2, 'desc': 'Weakly informative (centered at 0.6)'},
'strong_competence': {'alpha': 10, 'beta': 5, 'desc': 'Strong prior (p ~ 0.67)'},
'barely_competent': {'alpha': 6, 'beta': 5, 'desc': 'Skeptical prior (p ~ 0.55)'},
'incompetent': {'alpha': 5, 'beta': 10, 'desc': 'Incompetent prior (p ~ 0.33)'},
}def fit_condorcet_base_model(votes, spec):
with pm.Model() as model:
# SENSITIVITY PARAMETER: Prior on competence
# This is our first example of treating assumptions as parameters
p = pm.Beta('p', alpha=spec['alpha'], beta=spec['beta'])
# True state of each case (latent)
true_state = pm.Bernoulli('true_state', p=0.5, shape=n_cases)
# Likelihood
vote_prob = pm.math.switch(
pm.math.eq(true_state[:, None], 1),
p,
1 - p
)
likelihood = pm.Bernoulli('votes', p=vote_prob, observed=votes)
# Posterior predictive: majority vote accuracy for different jury sizes
jury_sizes_eval = [3, 7, 15]
for size in jury_sizes_eval:
# Simulate votes for a new case (truth = 1)
votes_sim = pm.Bernoulli(f'sim_votes_{size}', p=p, shape=size)
majority_correct = pm.Deterministic(
f'majority_correct_{size}',
pm.math.sum(votes_sim) > size / 2
)
# Sample
idata = pm.sample_prior_predictive()
idata.extend(pm.sample(2000, tune=1000, random_seed=42,
target_accept=0.95, return_inferencedata=True))
idata.extend(pm.sample_posterior_predictive(idata))
return idata, model
traces = {}
for prior_name, spec in prior_specs.items():
print(f"\nFitting with {spec['desc']}...")
idata, model = fit_condorcet_base_model(votes, spec)
traces[prior_name] = idata
traces[prior_name + '_model'] = model
Fitting with Weakly informative (centered at 0.6)...
Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]
Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]
Fitting with Strong prior (p ~ 0.67)...
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]
Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]
Fitting with Skeptical prior (p ~ 0.55)...
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]
Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]
Fitting with Incompetent prior (p ~ 0.33)...
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]
print("\n" + "="*70)
print("PRIOR SENSITIVITY RESULTS")
print("="*70)
ests = {}
for prior_name in prior_specs.keys():
for i in [3, 7, 15]:
p = traces[prior_name]['prior'][f'majority_correct_{i}'].mean().item()
if prior_name in ests:
ests[prior_name].append(p)
else:
ests[prior_name] = [p]
prior_estimates = pd.DataFrame(ests, index=['Correct % for Majority of 3', 'Correct % for Majority of 7', 'Correct % for Majority of 15'])
prior_estimates
======================================================================
PRIOR SENSITIVITY RESULTS
======================================================================
| weakly_informative | strong_competence | barely_competent | incompetent | |
|---|---|---|---|---|
| Correct % for Majority of 3 | 0.652 | 0.726 | 0.540 | 0.282 |
| Correct % for Majority of 7 | 0.672 | 0.806 | 0.578 | 0.200 |
| Correct % for Majority of 15 | 0.708 | 0.824 | 0.610 | 0.144 |
print("\n" + "="*70)
print("PRIOR SENSITIVITY RESULTS")
print("="*70)
ests = {}
for prior_name in prior_specs.keys():
for i in [3, 7, 15]:
p = traces[prior_name]['posterior'][f'majority_correct_{i}'].mean().item()
if prior_name in ests:
ests[prior_name].append(p)
else:
ests[prior_name] = [p]
posterior_estimates = pd.DataFrame(ests, index=['Correct % for Majority of 3', 'Correct % for Majority of 7', 'Correct % for Majority of 15'])
posterior_estimates
======================================================================
PRIOR SENSITIVITY RESULTS
======================================================================
| weakly_informative | strong_competence | barely_competent | incompetent | |
|---|---|---|---|---|
| Correct % for Majority of 3 | 0.776750 | 0.777750 | 0.776125 | 0.22150 |
| Correct % for Majority of 7 | 0.868375 | 0.866750 | 0.868000 | 0.13225 |
| Correct % for Majority of 15 | 0.949000 | 0.948875 | 0.945375 | 0.05000 |
fig, axs = plt.subplots(1, 2, figsize=(20, 5))
axs = axs.flatten()
jitters = np.linspace(-0.4, 0.4, 3)
for prior_name in prior_specs.keys():
axs[0].plot(prior_estimates.index, prior_estimates[prior_name], label=prior_name + ' prior', marker='o')
axs[1].plot(posterior_estimates.index, posterior_estimates[prior_name], label=prior_name + ' prior', marker='o')
axs[0].legend()
axs[1].legend()
axs[0].set_title("Prior Estimates for Majority Accuracy");
axs[1].set_title("Posterior Estimates for Majority Accuracy");# Define different priors on discrimination parameter
discrimination_priors = {
'weak_discrimination': {'sigma': 0.5, 'desc': 'Weak discrimination prior (σ ~ 0.25)'},
'moderate_discrimination': {'sigma': 1.0, 'desc': 'Moderate discrimination prior (σ ~ 0.5)'},
'strong_discrimination': {'sigma': 2.0, 'desc': 'Strong discrimination prior (σ ~ 2)'},
}
def fit_discrimination_binomial_model(votes, n_jurors, priors):
majority_votes = (votes.mean(axis=1) > 0.5).astype(int)
agreements_per_juror = np.array([(votes[:, j] == majority_votes).sum() for j in range(n_jurors)])
empirical_competence = agreements_per_juror / n_cases
with pm.Model() as sensitivity_model:
# Hyperpriors for the population distribution
mu_logit_p = pm.Normal('mu_logit_p', mu=0.6, sigma=0.5)
# KEY SENSITIVITY PARAMETER: individual discrimination
sigma_logit_p = pm.HalfNormal('sigma_logit_p', sigma=spec['sigma'])
# NON-CENTERED PARAMETERIZATION for better sampling
# Use: logit_p_juror = mu + sigma * z, where z ~ Normal(0, 1)
z_juror = pm.Normal('z_juror', mu=0, sigma=1, shape=n_jurors)
logit_p_juror = pm.Deterministic('logit_p_juror',
mu_logit_p + sigma_logit_p * z_juror)
p_juror = pm.Deterministic('p_juror', pm.math.invlogit(logit_p_juror))
# Mean competence
mean_p = pm.Deterministic('mean_p', pm.math.invlogit(mu_logit_p))
# Likelihood
pm.Binomial('agreements',
n=n_cases,
p=p_juror,
observed=agreements_per_juror)
# Sample with non-centered parameterization
idata = pm.sample_prior_predictive()
idata.extend(pm.sample(
1000,
tune=2000,
random_seed=42,
target_accept=0.95,
return_inferencedata=True,
idata_kwargs={"log_likelihood": True}
)
)
idata.extend(pm.sample_posterior_predictive(idata))
return idata, model
traces_discrimination = {}
for prior_name, spec in discrimination_priors.items():
print(f"\nFitting with {spec['desc']}...")
idata, model = fit_discrimination_binomial_model(votes, n_jurors, spec)
traces_discrimination[prior_name] = idata
traces_discrimination[prior_name + '_model'] = modelSampling: [agreements, mu_logit_p, sigma_logit_p, z_juror]
Initializing NUTS using jitter+adapt_diag...
Fitting with Weak discrimination prior (σ ~ 0.25)...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_logit_p, sigma_logit_p, z_juror]
Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 2 seconds.
Sampling: [agreements]
Sampling: [agreements, mu_logit_p, sigma_logit_p, z_juror]
Initializing NUTS using jitter+adapt_diag...
Fitting with Moderate discrimination prior (σ ~ 0.5)...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_logit_p, sigma_logit_p, z_juror]
Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 2 seconds.
Sampling: [agreements]
Sampling: [agreements, mu_logit_p, sigma_logit_p, z_juror]
Initializing NUTS using jitter+adapt_diag...
Fitting with Strong discrimination prior (σ ~ 2)...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_logit_p, sigma_logit_p, z_juror]
Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 2 seconds.
Sampling: [agreements]
import numpy as np
# ============================================================
# CONSOLIDATED POSTERIOR PREDICTIVE WORKFLOW
# ============================================================
# Required inputs:
# idata : InferenceData from collapsed (binomial) model
# n_cases : number of cases
# n_jurors : number of jurors
# jury_sizes : list of jury sizes to evaluate (e.g. [3, 5, 7, 10, 15])
jury_sizes = [3, 5, 7, 10, 15]
# ------------------------------------------------------------
# 1. Generative expansion: truth -> votes
# ------------------------------------------------------------
def simulate_votes(p_juror, n_cases, truth=None):
"""
p_juror : (n_jurors,)
returns:
truth : (n_cases,)
votes : (n_cases, n_jurors)
"""
if truth is None:
truth = np.random.binomial(1, 0.5, size=n_cases)
votes = np.zeros((n_cases, n_jurors), dtype=int)
for i in range(n_cases):
for j in range(n_jurors):
prob = p_juror[j] if truth[i] == 1 else 1 - p_juror[j]
votes[i, j] = np.random.binomial(1, prob)
return truth, votes
# ------------------------------------------------------------
# 3. Diagnostic functions
# ------------------------------------------------------------
def majority_accuracy(votes, truth):
majority = votes.mean(axis=1) > 0.5
return np.mean(majority == truth)
def unanimity_rate(votes):
return np.mean(
(votes.sum(axis=1) == 0) |
(votes.sum(axis=1) == votes.shape[1])
)
def juror_agreement_rates(votes, truth):
return np.mean(votes == truth[:, None], axis=0)
def error_correlation(votes, truth):
errors = votes != truth[:, None]
return np.corrcoef(errors.T)
def majority_accuracy_for_size(votes, truth, jury_size):
n_cases, n_jurors = votes.shape
correct = np.zeros(n_cases, dtype=int)
for i in range(n_cases):
jurors = np.random.choice(
n_jurors, size=jury_size, replace=False
)
sub_votes = votes[i, jurors]
majority = sub_votes.mean() > 0.5
correct[i] = (majority == truth[i])
return correct.mean()
# ------------------------------------------------------------
# 4. Run posterior predictive simulations
# ------------------------------------------------------------
def run_post_fit_ppc(p_juror_samples, n_cases, truth=None, jury_sizes=[3, 5, 7, 10, 15]):
# Storage
n_samples = p_juror_samples.shape[1]
n_jurors = p_juror_samples.shape[0]
ppc_results = {
"majority_accuracy_15": np.zeros(n_samples),
"unanimity_rate_15": np.zeros(n_samples),
"agreement_rates": np.zeros((n_samples, n_jurors)),
"error_corr": np.zeros((n_samples, n_jurors, n_jurors)),
"majority_accuracy_by_size": {
k: np.zeros(n_samples) for k in jury_sizes
}
}
for s in range(n_samples):
truth_s, votes_s = simulate_votes(
p_juror_samples[:, s],
n_cases,
truth
)
# Full jury diagnostics
ppc_results["majority_accuracy_15"][s] = (
majority_accuracy(votes_s, truth_s)
)
ppc_results["unanimity_rate_15"][s] = (
unanimity_rate(votes_s)
)
ppc_results["agreement_rates"][s] = (
juror_agreement_rates(votes_s, truth_s)
)
ppc_results["error_corr"][s] = (
error_correlation(votes_s, truth_s)
)
# Sub-jury diagnostics
for k in jury_sizes:
ppc_results["majority_accuracy_by_size"][k][s] = (
majority_accuracy_for_size(votes_s, truth_s, k)
)
return ppc_results
def summarise_error_corr(ppc_results):
corr = ppc_results["error_corr"] # (samples, jurors, jurors)
n = corr.shape[1]
off_diag = []
for s in range(corr.shape[0]):
mat = corr[s]
off_diag.append(
mat[np.triu_indices(n, k=1)]
)
off_diag = np.concatenate(off_diag)
return {
"mean_off_diag": off_diag.mean(),
"sd_off_diag": off_diag.std(),
"p95_abs_corr": np.percentile(np.abs(off_diag), 95),
}
# ------------------------------------------------------------
# 5. Summaries (example outputs)
# ------------------------------------------------------------
def summarise_post_fit_ppc(ppc_results):
print("\n=== Majority accuracy (full jury) ===")
a = np.percentile(
ppc_results["majority_accuracy_15"], [5, 50, 95]
)
print(a)
print("\n=== Unanimity rate (full jury) ===")
b = np.percentile(
ppc_results["unanimity_rate_15"], [5, 50, 95]
)
print(b)
summaries = []
percentiles = [5, 50, 95]
print("\n=== Majority accuracy by jury size ===")
for k in jury_sizes:
print(f"Jury size {k}:")
c = np.percentile(
ppc_results["majority_accuracy_by_size"][k],
percentiles
)
print(c)
summaries.append(c)
print("\n=== Mean juror agreement rates ===")
d = ppc_results["agreement_rates"].mean(axis=0)
print(d)
summaries.append(d)
summaries_df = pd.DataFrame(summaries[:-1]).T
majorities = [f'majority_accuracy_{i}' for i in jury_sizes]
columns = majorities
summaries_df.columns = columns
summaries_df.index = [f'percentile_{i}' for i in percentiles]
return summaries_df
def compare_prior_posterior(idata):
p_juror_samples_prior = (idata.prior["p_juror"].stack(sample=("chain", "draw")).values)
p_juror_samples_posterior = (idata.posterior["p_juror"].stack(sample=("chain", "draw")).values)
n_jurors, n_samples = p_juror_samples_posterior.shape
ppc_result_prior = run_post_fit_ppc(p_juror_samples_prior, n_cases, true_states)
ppc_result_posterior = run_post_fit_ppc(p_juror_samples_posterior, n_cases, true_states)
summaries_prior = summarise_post_fit_ppc(ppc_result_prior)
summaries_posterior = summarise_post_fit_ppc(ppc_result_posterior)
summary = pd.concat({'prior': summaries_prior, 'posterior': summaries_posterior})
return summary, ppc_result_posteriorCode
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def plot_differences(df):
# 1. Clean up column names to get numeric x-axis values
# Assuming your df is named 'df'
x_values = [3, 5, 7, 10, 15] # Extracted from your column headers
# If you have 'majority_accuracy_15' as mentioned in your text, add 15 to this list.
# 2. Extract the specific rows for plotting
prior_median = df.loc[('prior', 'percentile_50')]
prior_low = df.loc[('prior', 'percentile_5')]
prior_high = df.loc[('prior', 'percentile_95')]
post_median = df.loc[('posterior', 'percentile_50')]
post_low = df.loc[('posterior', 'percentile_5')]
post_high = df.loc[('posterior', 'percentile_95')]
# 3. Create the plot
plt.figure(figsize=(10, 6))
# Plot Prior
plt.plot(x_values, prior_median, label='Prior Median', color='blue', marker='o')
plt.fill_between(x_values, prior_low, prior_high, color='blue', alpha=0.2, label='Prior (5th-95th)')
# Plot Posterior
plt.plot(x_values, post_median, label='Posterior Median', color='red', marker='o')
plt.fill_between(x_values, post_low, post_high, color='red', alpha=0.2, label='Posterior (5th-95th)')
# Formatting
plt.title('Majority Accuracy: Prior vs Posterior Distributions')
plt.xlabel('Number of Jurors (n) in Majority Calculation')
plt.ylabel('Majority Accuracy Score')
plt.xticks(x_values)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
def plot_error_corr_heatmap(ppc_results, title="Error correlation"):
mean_corr = ppc_results["error_corr"].mean(axis=0)
plt.figure(figsize=(6, 5))
sns.heatmap(
mean_corr,
vmin=-0.3,
vmax=0.3,
cmap="coolwarm",
square=True,
cbar_kws={"label": "Error correlation"}
)
plt.title(title)
plt.tight_layout()
plt.show()summaries_weak_discrimination, ppc_result_posterior_weak_discrimination = compare_prior_posterior(traces_discrimination['weak_discrimination'])
summaries_weak_discrimination/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3065: RuntimeWarning: invalid value encountered in divide
c /= stddev[:, None]
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3066: RuntimeWarning: invalid value encountered in divide
c /= stddev[None, :]
=== Majority accuracy (full jury) ===
[0.36 0.86 1. ]
=== Unanimity rate (full jury) ===
[0. 0. 0.04]
=== Majority accuracy by jury size ===
Jury size 3:
[0.4 0.7 0.921]
Jury size 5:
[0.4 0.74 0.96]
Jury size 7:
[0.38 0.76 0.98]
Jury size 10:
[0.42 0.82 1. ]
Jury size 15:
[0.36 0.86 1. ]
=== Mean juror agreement rates ===
[0.61964 0.64204 0.6262 0.63484 0.63676 0.64796 0.63656 0.6322 0.63752
0.641 0.62172 0.62816 0.62896 0.63316 0.639 ]
=== Majority accuracy (full jury) ===
[0.9 0.96 1. ]
=== Unanimity rate (full jury) ===
[0. 0. 0.02]
=== Majority accuracy by jury size ===
Jury size 3:
[0.68 0.8 0.88]
Jury size 5:
[0.74 0.84 0.92]
Jury size 7:
[0.8 0.88 0.96]
Jury size 10:
[0.84 0.92 0.98]
Jury size 15:
[0.9 0.96 1. ]
=== Mean juror agreement rates ===
[0.694615 0.70128 0.706215 0.700695 0.700355 0.70774 0.693205 0.70913
0.703775 0.708515 0.71373 0.71302 0.70089 0.705545 0.703755]
| majority_accuracy_3 | majority_accuracy_5 | majority_accuracy_7 | majority_accuracy_10 | majority_accuracy_15 | ||
|---|---|---|---|---|---|---|
| prior | percentile_5 | 0.400 | 0.40 | 0.38 | 0.42 | 0.36 |
| percentile_50 | 0.700 | 0.74 | 0.76 | 0.82 | 0.86 | |
| percentile_95 | 0.921 | 0.96 | 0.98 | 1.00 | 1.00 | |
| posterior | percentile_5 | 0.680 | 0.74 | 0.80 | 0.84 | 0.90 |
| percentile_50 | 0.800 | 0.84 | 0.88 | 0.92 | 0.96 | |
| percentile_95 | 0.880 | 0.92 | 0.96 | 0.98 | 1.00 |
plot_error_corr_heatmap(ppc_result_posterior_weak_discrimination)
summarise_error_corr(ppc_result_posterior_weak_discrimination){'mean_off_diag': np.float64(-0.0002439357891321022),
'sd_off_diag': np.float64(0.14286033462137696),
'p95_abs_corr': np.float64(0.27759801501994014)}
plot_differences(summaries_weak_discrimination)summaries_moderate_discrimination, ppc_result_posterior_moderate_discrimination = compare_prior_posterior(traces_discrimination['moderate_discrimination'])
summaries_moderate_discrimination/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3065: RuntimeWarning: invalid value encountered in divide
c /= stddev[:, None]
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3066: RuntimeWarning: invalid value encountered in divide
c /= stddev[None, :]
=== Majority accuracy (full jury) ===
[0.32 0.86 1. ]
=== Unanimity rate (full jury) ===
[0. 0. 0.04]
=== Majority accuracy by jury size ===
Jury size 3:
[0.4 0.7 0.9]
Jury size 5:
[0.42 0.74 0.96]
Jury size 7:
[0.38 0.76 0.98]
Jury size 10:
[0.38 0.81 1. ]
Jury size 15:
[0.32 0.86 1. ]
=== Mean juror agreement rates ===
[0.62964 0.6278 0.63744 0.63632 0.617 0.6254 0.61976 0.63256 0.63128
0.62652 0.63736 0.63448 0.62104 0.6332 0.63088]
=== Majority accuracy (full jury) ===
[0.9 0.96 1. ]
=== Unanimity rate (full jury) ===
[0. 0. 0.02]
=== Majority accuracy by jury size ===
Jury size 3:
[0.68 0.8 0.88]
Jury size 5:
[0.74 0.84 0.92]
Jury size 7:
[0.78 0.88 0.96]
Jury size 10:
[0.84 0.92 0.98]
Jury size 15:
[0.9 0.96 1. ]
=== Mean juror agreement rates ===
[0.69193 0.700095 0.704435 0.700705 0.700585 0.707545 0.69066 0.711665
0.70335 0.708425 0.70878 0.71398 0.69932 0.70703 0.70433 ]
| majority_accuracy_3 | majority_accuracy_5 | majority_accuracy_7 | majority_accuracy_10 | majority_accuracy_15 | ||
|---|---|---|---|---|---|---|
| prior | percentile_5 | 0.40 | 0.42 | 0.38 | 0.38 | 0.32 |
| percentile_50 | 0.70 | 0.74 | 0.76 | 0.81 | 0.86 | |
| percentile_95 | 0.90 | 0.96 | 0.98 | 1.00 | 1.00 | |
| posterior | percentile_5 | 0.68 | 0.74 | 0.78 | 0.84 | 0.90 |
| percentile_50 | 0.80 | 0.84 | 0.88 | 0.92 | 0.96 | |
| percentile_95 | 0.88 | 0.92 | 0.96 | 0.98 | 1.00 |
plot_error_corr_heatmap(ppc_result_posterior_moderate_discrimination)
summarise_error_corr(ppc_result_posterior_moderate_discrimination){'mean_off_diag': np.float64(0.0001145121720577443),
'sd_off_diag': np.float64(0.1429544471193103),
'p95_abs_corr': np.float64(0.27695585470349854)}
plot_differences(summaries_moderate_discrimination)summaries_strong_discrimination, ppc_result_posterior_strong_discrimination = compare_prior_posterior(traces_discrimination['strong_discrimination'])
summaries_strong_discrimination/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3065: RuntimeWarning: invalid value encountered in divide
c /= stddev[:, None]
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3066: RuntimeWarning: invalid value encountered in divide
c /= stddev[None, :]
=== Majority accuracy (full jury) ===
[0.159 0.86 1. ]
=== Unanimity rate (full jury) ===
[0. 0. 0.02]
=== Majority accuracy by jury size ===
Jury size 3:
[0.34 0.68 0.9 ]
Jury size 5:
[0.32 0.72 0.94]
Jury size 7:
[0.26 0.74 0.96]
Jury size 10:
[0.26 0.82 0.98]
Jury size 15:
[0.159 0.86 1. ]
=== Mean juror agreement rates ===
[0.60028 0.61212 0.59256 0.60508 0.60072 0.6142 0.63324 0.62096 0.60088
0.58748 0.62488 0.60488 0.60544 0.59912 0.60396]
=== Majority accuracy (full jury) ===
[0.9 0.96 1. ]
=== Unanimity rate (full jury) ===
[0. 0. 0.02]
=== Majority accuracy by jury size ===
Jury size 3:
[0.68 0.8 0.88]
Jury size 5:
[0.74 0.84 0.92]
Jury size 7:
[0.78 0.88 0.96]
Jury size 10:
[0.84 0.92 0.98]
Jury size 15:
[0.9 0.96 1. ]
=== Mean juror agreement rates ===
[0.693955 0.701255 0.706355 0.70056 0.69895 0.707245 0.692345 0.71037
0.70129 0.710385 0.711215 0.7117 0.7015 0.708655 0.702275]
| majority_accuracy_3 | majority_accuracy_5 | majority_accuracy_7 | majority_accuracy_10 | majority_accuracy_15 | ||
|---|---|---|---|---|---|---|
| prior | percentile_5 | 0.34 | 0.32 | 0.26 | 0.26 | 0.159 |
| percentile_50 | 0.68 | 0.72 | 0.74 | 0.82 | 0.860 | |
| percentile_95 | 0.90 | 0.94 | 0.96 | 0.98 | 1.000 | |
| posterior | percentile_5 | 0.68 | 0.74 | 0.78 | 0.84 | 0.900 |
| percentile_50 | 0.80 | 0.84 | 0.88 | 0.92 | 0.960 | |
| percentile_95 | 0.88 | 0.92 | 0.96 | 0.98 | 1.000 |
plot_error_corr_heatmap(ppc_result_posterior_strong_discrimination)
summarise_error_corr(ppc_result_posterior_strong_discrimination){'mean_off_diag': np.float64(7.785091800380223e-05),
'sd_off_diag': np.float64(0.14292377897521807),
'p95_abs_corr': np.float64(0.2775980150199401)}
plot_differences(summaries_strong_discrimination)Case Difficulty
def ppc_with_case_difficulty(
idata,
n_cases,
sigma_case,
rng=np.random.default_rng(123),
true_states = None
):
"""
PPC generator with shared case-level shocks.
"""
logit_p = (idata.posterior['logit_p_juror']
.stack(sample=("chain", "draw")).values)
n_jurors, n_samples = logit_p.shape
truth = np.zeros((n_samples, n_cases))
if true_states is None:
true_states = rng.binomial(1, 0.5, size=n_cases)
truth[:, ] = true_states
delta_case = rng.normal(0.0, sigma_case, size=(n_samples, n_cases))
votes = np.zeros((n_samples, n_cases, n_jurors), dtype=int)
for s in range(n_samples):
for i in range(n_cases):
sign = 1 if truth[s, i] == 1 else -1
logits = sign * logit_p[:, s] + delta_case[s, i]
p = 1 / (1 + np.exp(-logits))
votes[s, i] = rng.binomial(1, p)
return {
"votes": votes,
"true_state": truth,
"delta_case": delta_case,
}
for sigma in [0.0, 0.2, 0.5, 1]:
ppc = ppc_with_case_difficulty(
traces_discrimination['weak_discrimination'],
n_cases=50,
sigma_case=sigma,
true_states=true_states
)
n_samples = ppc['votes'].shape[0]
corrs = []
for s in range(n_samples):
C = error_correlation(ppc["votes"][s, :], ppc['true_state'][s])
corrs.append(C)
corrs = np.stack(corrs)
off_diag = corrs[:, ~np.eye(corrs.shape[1], dtype=bool)]
acc = [majority_accuracy(ppc["votes"][i, :, :], ppc["true_state"][i, :]) for i in range(n_samples)]
print(f"\nσ_case = {sigma}")
print("Mean majority accuracy:", np.mean(acc))
print("mean_corr", np.nanmean(off_diag))
print("median_corr", np.nanmedian(off_diag))
print("p95_abs_corr", np.nanpercentile(np.abs(off_diag), 95))
print("nan_fraction", np.isnan(off_diag).mean())
σ_case = 0.0
Mean majority accuracy: 0.9528000000000001
mean_corr -0.0004735283739258481
median_corr -0.006593661395050807
p95_abs_corr 0.2762832423274467
nan_fraction 0.0
σ_case = 0.2
Mean majority accuracy: 0.940575
mean_corr 0.008204727656772869
median_corr 0.005636260652626381
p95_abs_corr 0.2777155134201167
nan_fraction 0.0
σ_case = 0.5
Mean majority accuracy: 0.8855
mean_corr 0.04870339440464984
median_corr 0.04761904761904767
p95_abs_corr 0.30012252399939066
nan_fraction 0.0
σ_case = 1
Mean majority accuracy: 0.777865
mean_corr 0.16094673060834333
median_corr 0.16390251702679648
p95_abs_corr 0.40247590361231644
nan_fraction 0.0
Citation
BibTeX citation:
@online{forde,
author = {Forde, Nathaniel},
title = {The {Condorcet} {Jury} {Theorem} and {Democratic}
{Rationality}},
langid = {en}
}
For attribution, please cite this work as:
Forde, Nathaniel. n.d. “The Condorcet Jury Theorem and Democratic
Rationality.”